package com.buluo;

import java.util.List;

import org.springframework.stereotype.Component;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

/**
 * 我的部落
 * @author guolei
 *
 */
@Component
public class DataPageProcesser  implements PageProcessor  {

    @Override
    public void process(Page page) {
        List<String> links = page.getHtml().links().regex("http://wodebuluo\\.gamedog\\.cn/gonglue/\\d+/\\d+\\.html").all();
        page.addTargetRequests(links);
        page.putField("title", page.getHtml().xpath("//div[@class='main1']/div[@class='info_left']/div[@class='wen']/h1/text()").toString());
        page.putField("content", page.getHtml().xpath("//div[@class='main1']/div[@class='info_left']/div[@class='wen']/div[@class='news_neirong']/html()").toString());
        page.putField("date", page.getHtml().xpath("//div[@class='main1']/div[@class='info_left']/div[@class='wen']/div[@class='newsa']/span/text()").toString());
    }

    @Override
    public Site getSite() {
        return Site.me().setDomain("wodebuluo.gamedog.cn").
                setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
    }
	
}
